##############################
# STATE REACH AND DEVELOPMENT // PSRM // Replication
#
# Leads analysis
# Sourced from "code/analysis_main.R"
#
##############################

# MAIN: FULL SAMPLE
stub <- "taba8_leads"


# Make logs
longterm.leads <- c(6:10)
shortterm.leads <- c(1:5)
data.ls <- lapply(data.ls, function(df){

  # Load leads, generate logged versions
  these.vars <- paste0("natcap.le", c(longterm.leads, shortterm.leads))
  these.vars <- these.vars[these.vars %in% colnames(df)]
  if(length(these.vars) == 0){return(df)}
  df[,paste0(these.vars, ".ln")] <- log(1 + df[,these.vars])
  these.vars <- paste0("regcap.le", c(longterm.leads, shortterm.leads))
  these.vars <- these.vars[these.vars %in% colnames(df)]
  df[,paste0(these.vars, ".ln")] <- log(1 + df[,these.vars])
  
  # Make average childhood state cap for education outcomes
  if("educ.prim" %in% colnames(df)){
    df$natcap.ln.avg <- log(1+ rowMeans(df[, c("natcap", paste0("natcap.le", c(1:(min(longterm.leads)-1))))]))
    df$regcap.ln.avg <- log(1+ rowMeans(df[, c("regcap", paste0("regcap.le", c(1:(min(longterm.leads)-1))))]))
  }
  df
})

# Prepare
add_leads <- function(dn){
  if(dn == "educ"){
    list(c(paste0("natcap.le",c( longterm.leads),".ln"),
           paste0("regcap.le",c( longterm.leads),".ln")))
  } else {
    list(c(paste0("natcap.le",c(shortterm.leads),".ln"),
           paste0("regcap.le",c(shortterm.leads),".ln")))
    
  }
}


# Estimate
model.ls <- unlist(lapply(main.data.names, function(dn){
  lapply(add_leads(dn), function(av){
    temp.df <-  data.ls[[dn]]
    if(dn == "educ"){
      temp.df[, treat.vars] <- temp.df[, c("natcap.ln.avg", "regcap.ln.avg")]
    } 
    form <- make_form(dv = get_depvar(dn), expl = c(treat.vars, av[av %in% colnames(data.ls[[dn]])], get_contr(dn)), 
                      fe = fe.global, 
                      iv = "0",
                      se = cluster.global )
    felm(form,  data = temp.df, keepX = F, keepCX = F)
  })
}), recursive = F)
model.ls.full.leads <- model.ls

# ... sum leads
library(car)
count <- 0
sum.leads <- lapply(model.ls, function(m){
  count <<- count + 1
  # print("next")
  lapply(c("natcap","regcap"), function(v){
    # print(v)
    vars <- rownames(m$coefficients)[grepl(paste0(v, ".le"), rownames(m$coefficients))]
    if(count == 1){
      vars <- vars[grepl(paste(longterm.leads, collapse = "|"), vars)]
    }
    dm <- deltaMethod(m, paste(vars, collapse = " + "), vcov. = m$clustervcv)
    wt <- lfe::waldtest(m, as.formula(paste("~", paste(vars, collapse = " + "))), type = "cluster")
    dm$pvalue <- wt["p"]
    dm
  })
})
sum.leads.full <- sum.leads

# ... sum education main effect (t=0...t=5)
sum.educ.main <- lapply(c("natcap","regcap"), function(v){
  m <- model.ls.full.leads[[1]]
  print(v)
  vars <- c(paste0(v, ".ln"),rownames(m$coefficients)[grepl(paste0(v, ".le"), rownames(m$coefficients))])
  vars <- vars[!grepl(paste(longterm.leads, collapse = "|"), vars)]
  dm <- deltaMethod(m, paste(vars, collapse = " + "), vcov. = m$clustervcv)
  wt <- lfe::waldtest(m, as.formula(paste("~", paste(vars, collapse = " + "))), type = "cluster")
  dm$pvalue <- wt["p"]
  dm
})
sum.educ.main.full <- sum.educ.main


#... sum of betas
sum.betas <- lapply(model.ls, function(m){
  if("natcap.ln.avg" %in% rownames(m$coefficients)){
    treat.vars.temp <- c("natcap.ln.avg", "regcap.ln.avg")
    dm <- deltaMethod(m, paste(treat.vars.temp, collapse = " + "), vcov. = m$clustervcv)
    wt <- lfe::waldtest(m, as.formula(paste("~", paste(treat.vars.temp, collapse = " + "))), type = "cluster")
  } else {
    dm <- deltaMethod(m, paste(treat.vars, collapse = " + "), vcov. = m$clustervcv)
    wt <- lfe::waldtest(m, as.formula(paste("~", paste(treat.vars, collapse = " + "))), type = "cluster")
  }
  
  dm$pvalue <- wt["p"]
  dm
})


# ... prepare
add.lines <- list(latex.addline_nomc("$\\beta_1 + \\beta_2$:", 
                                     unlist(lapply(sum.betas, function(x){
                                       paste0(round(x[["Estimate"]], 3), 
                                              ifelse(x[["pvalue"]] < .01, "^{***}",ifelse(x[["pvalue"]] < .05, "^{**}",ifelse(x[["pvalue"]] < .1, "^{*}","" ) ) ))
                                     }))),
                  latex.addline_nomc("", unlist(lapply(sum.betas, function(x) paste0("(",round(x[["SE"]], 3), ")")))),
                  latex.addline_nomc("Sum of leads (nat. cap.):", unlist(lapply(sum.leads, function(x){
                    paste0(round(x[[1]]$Estimate, 3), ifelse(x[[1]]$pvalue < .01, "^{***}",ifelse(x[[1]]$pvalue < .05, "^{**}",ifelse(x[[1]]$pvalue < .1, "^{*}","" ) ) ))}))),
                  latex.addline_nomc("", unlist(lapply(sum.leads, function(x) paste0("(",round(x[[1]]$SE, 3), ")")))),
                  latex.addline_nomc("Sum of leads (reg cap.):", unlist(lapply(sum.leads, function(x){
                    paste0(round(x[[2]]$Estimate, 3), ifelse(x[[2]]$pvalue < .01, "^{***}",ifelse(x[[2]]$pvalue < .05, "^{**}",ifelse(x[[2]]$pvalue < .1, "^{*}","" ) ) ))}))),
                  latex.addline_nomc("", unlist(lapply(sum.leads, function(x) paste0("(",round(x[[2]]$SE, 3), ")")))),
                  latex.addline("Time to cap.$_{t+1,...,t+5}$:", c("yes", "yes", "yes")),
                  latex.addline("Point FE:", rep("yes", length(model.ls))),
                  latex.addline("Country-year FE:", rep("yes", length(model.ls))),
                  latex.addline("Survey FE:", rep(c("yes","yes","--"), 1)),
                  latex.addline("Controls:", rep(c("yes","yes","--"), 1)),
                  latex.mean.dv(model.ls))
add.lines[[2]][[length(add.lines[[2]])]] <- paste(add.lines[[2]][[length(add.lines[[2]])]], "\\\\ \\hline \\\\[-1.8ex] ")
add.lines[[6]][[length(add.lines[[6]])]] <- paste(add.lines[[6]][[length(add.lines[[6]])]], "\\\\ \\hline \\\\[-1.8ex] ")

# ... save main
fileConn <- file(file.path(tab.path, paste0(stub,".tex")))
writeLines(stargazer(model.ls,
                     title="Changes in time to national/regional capital and local development: Controlling for leads",
                     keep = c(treat.vars),
                     multicolumn = F,
                     column.labels = names(unlist(lapply(main.data.names, get_depvar))),
                     column.separate = rep(1, length(main.data.names)), 
                     dep.var.caption = "",dep.var.labels = rep("", length(model.ls)),
                     covariate.labels = c(names(treat.vars)),
                     font.size = "scriptsize",
                     notes.align = "c", label=stub, align =T,
                     add.lines = add.lines,digits = 3, intercept.top = T,intercept.bottom = F,
                     omit.stat = c("rsq","res.dev","ser"),
                     notes = latex.notes.comb(.85), notes.label = "", notes.append = F), 
           fileConn)
close(fileConn)

# Save coefs
all.coef.list[[stub]] <- extract_coef(model.ls)

# LEAD TREATMENT INDICATORS, MIGRANTS AND NON-MIGRANTS ######
stub <- "taba9_leads_migrant"


# Make logs of leads
ir.mig.df <- lapply(list(ir.mig.df), function(df){
  
  # Load leads
  these.vars <- paste0("natcap.le", c(longterm.leads, shortterm.leads))
  these.vars <- these.vars[these.vars %in% colnames(df)]
  if(length(these.vars) == 0){return(df)}
  df[,paste0(these.vars, ".ln")] <- log(1 + df[,these.vars])
  these.vars <- paste0("regcap.le", c(longterm.leads, shortterm.leads))
  these.vars <- these.vars[these.vars %in% colnames(df)]
  df[,paste0(these.vars, ".ln")] <- log(1 + df[,these.vars])
  
  # Make childhood average
  df$natcap.ln.avg <- log(1+ rowMeans(df[, c("natcap", paste0("natcap.le", c(1:(min(longterm.leads)-1))))]))
  df$regcap.ln.avg <- log(1+ rowMeans(df[, c("regcap", paste0("regcap.le", c(1:(min(longterm.leads)-1))))]))
  df
})[[1]]

# Estimate
model.ls <- unlist(lapply(c("educ","infmort"), function(dn){
  lapply(add_leads(dn), function(av){
    if(dn == "educ"){
      temp.df <- ir.mig.df
      temp.df[, treat.vars] <- temp.df[, c("natcap.ln.avg", "regcap.ln.avg")]
    } else {
      temp.df <-  data.ls[[dn]]
    }
    temp.df[,paste0(c(treat.vars, av), ".mig")] <- temp.df[,c(treat.vars, av)] * temp.df$is.migrant
    temp.df[,paste0(c(treat.vars, av))] <- temp.df[,c(treat.vars, av)] * as.numeric((temp.df$is.migrant == 0))
    form <- make_form(dv = get_depvar(dn), expl = c(c(treat.vars, av), "is.migrant", paste0(c(treat.vars, av), ".mig"), 
                                                    get_contr(dn)), 
                      fe = fe.global, 
                      iv = "0",
                      se = cluster.global )
    felm(form,  data = temp.df, keepX = F, keepCX = F)
  })
}), recursive = F)
model.ls.mig.leads <- model.ls

# ... sum leads
count <- 0
sum.leads <- lapply(model.ls, function(m){
  count <<- count + 1
  print("next")
  unlist(lapply(c(0,1), function(mig){
    lapply(c("natcap","regcap"), function(v){
      print(v)
      vars <- rownames(m$coefficients)[grepl(paste0(v, ".le"), rownames(m$coefficients))]
      vars <- vars[as.numeric(grepl("mig", vars, fixed = T)) == mig]
      if(count == 1){
        vars <- vars[grepl(paste(longterm.leads, collapse = "|"), vars)]
      }
      dm <- deltaMethod(m, paste(vars, collapse = " + "), vcov. = m$clustervcv)
      wt <- lfe::waldtest(m, as.formula(paste("~", paste(vars, collapse = " + "))), type = "cluster")
      dm$pvalue <- wt["p"]
      dm
    })
  }), recursive = F)
})
sum.leads.mig <- sum.leads

#... sum of betas
sum.betas <- lapply(model.ls, function(m){
  if("natcap.ln.avg" %in% rownames(m$coefficients)){
    treat.vars.temp <- c("natcap.ln.avg", "regcap.ln.avg")
    dm <- deltaMethod(m, paste(treat.vars.temp, collapse = " + "), vcov. = m$clustervcv)
    wt <- lfe::waldtest(m, as.formula(paste("~", paste(treat.vars.temp, collapse = " + "))), type = "cluster")
  } else {
    dm <- deltaMethod(m, paste(treat.vars, collapse = " + "), vcov. = m$clustervcv)
    wt <- lfe::waldtest(m, as.formula(paste("~", paste(treat.vars, collapse = " + "))), type = "cluster")
  }
  
  dm$pvalue <- wt["p"]
  dm
})

# ... prepare
add.lines <- list(latex.addline_nomc("Non-migrants: $\\beta_1 + \\beta_2$:", 
                                     unlist(lapply(sum.betas, function(x){
                                       paste0(round(x[["Estimate"]], 3), 
                                              ifelse(x[["pvalue"]] < .01, "^{***}",ifelse(x[["pvalue"]] < .05, "^{**}",ifelse(x[["pvalue"]] < .1, "^{*}","" ) ) ))
                                     }))),
                  latex.addline_nomc("", unlist(lapply(sum.betas, function(x) paste0("(",round(x[["SE"]], 3), ")")))),
                  latex.addline("\\emph{Non-migrant leads:}", c("", "", "")),
                  latex.addline_nomc("Sum of leads (nat. cap.):", unlist(lapply(sum.leads, function(x){
                    paste0(round(x[[1]]$Estimate, 3), ifelse(x[[1]]$pvalue < .01, "^{***}",ifelse(x[[1]]$pvalue < .05, "^{**}",ifelse(x[[1]]$pvalue < .1, "^{*}","" ) ) ))}))),
                  latex.addline_nomc("", unlist(lapply(sum.leads, function(x) paste0("(",round(x[[1]]$SE, 3), ")")))),
                  latex.addline_nomc("Sum of leads (reg. cap.):", unlist(lapply(sum.leads, function(x){
                    paste0(round(x[[2]]$Estimate, 3), ifelse(x[[2]]$pvalue < .01, "^{***}",ifelse(x[[2]]$pvalue < .05, "^{**}",ifelse(x[[2]]$pvalue < .1, "^{*}","" ) ) ))}))),
                  latex.addline_nomc("", unlist(lapply(sum.leads, function(x) paste0("(",round(x[[2]]$SE, 3), ")")))),
                  latex.addline("\\emph{Migrant leads:}", c("", "", "")),
                  latex.addline_nomc("Sum of leads (nat. cap.):", unlist(lapply(sum.leads, function(x){
                    paste0(round(x[[3]]$Estimate, 3), ifelse(x[[3]]$pvalue < .01, "^{***}",ifelse(x[[3]]$pvalue < .05, "^{**}",ifelse(x[[3]]$pvalue < .1, "^{*}","" ) ) ))}))),
                  latex.addline_nomc("", unlist(lapply(sum.leads, function(x) paste0("(",round(x[[3]]$SE, 3), ")")))),
                  latex.addline_nomc("Sum of leads (reg. cap.):", unlist(lapply(sum.leads, function(x){
                    paste0(round(x[[4]]$Estimate, 3), ifelse(x[[4]]$pvalue < .01, "^{***}",ifelse(x[[4]]$pvalue < .05, "^{**}",ifelse(x[[4]]$pvalue < .1, "^{*}","" ) ) ))}))),
                  latex.addline_nomc("", unlist(lapply(sum.leads, function(x) paste0("(",round(x[[4]]$SE, 3), ")")))),
                  latex.addline("Time to cap.$_{t+1,...,t+5}$:", c("yes", "yes")),
                  latex.addline("Point FE:", rep("yes", length(model.ls))),
                  latex.addline("Country-year FE:", rep("yes", length(model.ls))),
                  latex.addline("Survey FE:", rep("yes", length(model.ls))),
                  latex.addline("Controls:", rep(c("yes","yes"), 1)),
                  latex.mean.dv(model.ls))
add.lines[[2]][[length(add.lines[[2]])]] <- paste(add.lines[[2]][[length(add.lines[[2]])]], "\\\\ \\hline \\\\[-1.8ex] ")
add.lines[[12]][[length(add.lines[[12]])]] <- paste(add.lines[[12]][[length(add.lines[[12]])]], "\\\\ \\hline \\\\[-1.8ex] ")


# ... save main
fileConn <- file(file.path(tab.path, paste0(stub,".tex")))
writeLines(stargazer(model.ls,
                     title="Changes in time to national/regional capital and local development: Controlling for leads, migrants and non-migrants",
                     keep = c("is.migrant",treat.vars),
                     multicolumn = F,
                     column.labels = c(names(unlist(lapply(main.data.names, get_depvar)))),
                     column.separate = rep(1, length(main.data.names)), 
                     dep.var.caption = "",dep.var.labels = rep("", length(model.ls)),
                     covariate.labels = c("Migrant", paste0("Non-migrants: ",names(treat.vars)),
                                          paste0("Migrants: ",names(treat.vars))),
                     font.size = "scriptsize",
                     notes.align = "c", label=stub, align =T,
                     add.lines = add.lines,digits = 3, intercept.top = T,intercept.bottom = F,
                     omit.stat = c("rsq","res.dev","ser"),
                     notes = latex.notes.comb(.85), notes.label = "", notes.append = F), 
           fileConn)
close(fileConn)

# Save coefs
all.coef.list[[stub]] <- extract_coef(model.ls)


# FIGURE A15 PLOT ALL LEADS ###################

coef.df <- NULL

# ... Baseline
coef.df <- rbind(coef.df, do.call(rbind,lapply(1:length(main.data.names), function(i){
  do.call(rbind, lapply(1:2, function(j){
    v <- c("natcap","regcap")[j]
    m <- all.coef.list[["tab1_main"]][[i]]
    data.frame(outcome = names(get_depvar(main.data.names[i])),
               var = c("natcap","regcap")[j],
               type = c("Baseline"),
               coef = c(m$coef[paste0(v, ".ln"),1]),
               se = c(m$clustervcv[paste0(v, ".ln"),paste0(v, ".ln")]^.5),
               stringsAsFactors = F)
  }))
})))

# ... full sample
coef.df <- rbind(coef.df, do.call(rbind,lapply(1:length(main.data.names), function(i){
  do.call(rbind, lapply(1:2, function(j){
    v <- c("natcap","regcap")[j]
    m <- all.coef.list[["taba8_leads"]][[i]]
    pos <- which(grepl(v, rownames(m$coef)))
    data.frame(outcome = names(get_depvar(main.data.names[i])),
               var = c("natcap","regcap")[j],
               type = c("Main estimate", paste0("Lead ", 1:5),"Sum of leads"),
               coef = c(m$coef[pos,1],  sum.leads.full[[i]][[j]]$Estimate),
               se = c(diag(m$clustervcv[pos,pos])^.5, sum.leads.full[[i]][[j]]$SE),
               stringsAsFactors = F)
  }))
})))


# ... Prepare
coef.df$var <- ifelse(coef.df$var == "regcap", "... to reg. capital", "... to nat. capital")
coef.df$var <- factor(coef.df$var, levels = unique(coef.df$var), ordered = T)
coef.df$outcome <- factor(coef.df$outcome, levels = unique(coef.df$outcome), ordered = T)
coef.df$type.fac <- factor(coef.df$type , levels = unique(coef.df$type), ordered = T)
coef.df$xpos <- as.numeric(coef.df$type.fac)
coef.df$xpos[coef.df$outcome == "Light/capita (log)"] <- coef.df$xpos[coef.df$outcome == "Light/capita (log)"] + .15
coef.df$xpos[coef.df$type == "Baseline"] <- 1

# ... plot

png(file.path(fig.path, paste0("figa15_leads_fullonly.png")), width = 7, height = 4, units = "in", res = 300)
p <- ggplot(coef.df, aes(x = xpos, y = coef)) +
  geom_hline(yintercept = 0, lty = 2, col = "grey") +
  geom_vline(xintercept = 1.5, lty = 2, col = "grey") +
  geom_point() +
  geom_segment(aes(y = coef + se * 1.96,
                   yend = coef - se * 1.96,
                   x = xpos, xend = xpos)) +
  facet_wrap(var ~ outcome , scales = "free_y") +
  xlab("") + 
  ylab("Marginal effect of time to\nnat./reg. capital") +
  scale_x_continuous(breaks = unique(as.numeric(coef.df$type.fac)),
                     labels = unique(coef.df$type.fac)) +
  theme_minimal() +
  theme(legend.position="top",
        axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
print(p)
dev.off()


